In [1]:
# Import the needed libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from copy import copy
from scipy import stats
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
In [2]:
# Read the data and View it 
stocks_df
Out[2]:
Date AAPL AMZN JPM MGM TSLA VOO VZ
0 2012-01-03 12.669562 179.029999 27.060522 10.501117 5.616000 97.755295 26.497862
1 2012-01-04 12.737655 177.509995 27.231941 10.434954 5.542000 97.922981 26.151049
2 2012-01-05 12.879071 177.610001 27.800735 10.520023 5.424000 98.241592 25.970974
3 2012-01-06 13.013705 182.610001 27.551395 10.520023 5.382000 98.040359 25.896662
4 2012-01-09 12.993064 178.559998 27.504658 10.699608 5.450000 98.191277 25.923681
... ... ... ... ... ... ... ... ...
2222 2020-10-30 108.672516 3036.149902 98.040001 20.570000 388.040009 299.820007 56.990002
2223 2020-11-02 108.582664 3004.479980 100.250000 20.969999 400.510010 303.309998 57.700001
2224 2020-11-03 110.249794 3048.409912 103.410004 21.750000 423.899994 308.660004 57.750000
2225 2020-11-04 114.752022 3241.159912 100.250000 21.440001 420.980011 315.609985 57.220001
2226 2020-11-05 118.824997 3322.000000 104.349998 21.840000 438.089996 321.790009 58.150002

2227 rows × 8 columns

In [3]:
# Sort the Date column values
stocks_df.sort_values(['Date'])
Out[3]:
Date AAPL AMZN JPM MGM TSLA VOO VZ
0 2012-01-03 12.669562 179.029999 27.060522 10.501117 5.616000 97.755295 26.497862
1 2012-01-04 12.737655 177.509995 27.231941 10.434954 5.542000 97.922981 26.151049
2 2012-01-05 12.879071 177.610001 27.800735 10.520023 5.424000 98.241592 25.970974
3 2012-01-06 13.013705 182.610001 27.551395 10.520023 5.382000 98.040359 25.896662
4 2012-01-09 12.993064 178.559998 27.504658 10.699608 5.450000 98.191277 25.923681
... ... ... ... ... ... ... ... ...
2222 2020-10-30 108.672516 3036.149902 98.040001 20.570000 388.040009 299.820007 56.990002
2223 2020-11-02 108.582664 3004.479980 100.250000 20.969999 400.510010 303.309998 57.700001
2224 2020-11-03 110.249794 3048.409912 103.410004 21.750000 423.899994 308.660004 57.750000
2225 2020-11-04 114.752022 3241.159912 100.250000 21.440001 420.980011 315.609985 57.220001
2226 2020-11-05 118.824997 3322.000000 104.349998 21.840000 438.089996 321.790009 58.150002

2227 rows × 8 columns

In [4]:
# Print all the column names
stocks_df.columns[1:] # excludes the date column
Out[4]:
Index(['AAPL', 'AMZN', 'JPM', 'MGM', 'TSLA', 'VOO', 'VZ'], dtype='object')
In [5]:
# Basic Data Analysis ------------------------------

# What was the average prices of sp500?
display(stocks_df.mean())

# Which stock had the lowest standard deviation 
display(stocks_df.std())

# What is the maximum price of AMZN?
max(stocks_df['AMZN'])
AAPL     35.659219
AMZN    976.379888
JPM      69.560247
MGM      22.088033
TSLA     61.707457
VOO     191.569250
VZ       41.922391
dtype: float64
AAPL     22.578530
AMZN    784.674107
JPM      28.601932
MGM       6.714509
TSLA     72.627088
VOO      57.540986
VZ        8.774266
dtype: float64
Out[5]:
3531.4499509999996

Data Analysis and Visulization

In [6]:
# How many null values does each column have?
stocks_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2227 entries, 0 to 2226
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    2227 non-null   object 
 1   AAPL    2227 non-null   float64
 2   AMZN    2227 non-null   float64
 3   JPM     2227 non-null   float64
 4   MGM     2227 non-null   float64
 5   TSLA    2227 non-null   float64
 6   VOO     2227 non-null   float64
 7   VZ      2227 non-null   float64
dtypes: float64(7), object(1)
memory usage: 139.3+ KB
In [7]:
# Plot all stock prices versus date using a function
def show_plot(data, title):
    data.plot(x='Date', figsize=(15,6), linewidth=3, title=title)
    plt.grid()
    plt.show()

show_plot(stocks_df, 'Stock Prices over time (not normalized)')
In [8]:
# Plot the normalized stock prices over time

# Normalize the data
def normalize(data):
    x=data.copy() # Create a copy
    
    # Select every column, except Date, and divide the column by its price on 2012-01-12
    for i in x.columns[1:]:
        x[i] = data[i] / data[i][0]
    
    return x 

stocks_df_normalized = normalize(stocks_df) # Pass stocks_df to normalize()
show_plot(stocks_df_normalized, 'Stock Prices over time (Normalized)') # Plot the data
Perform interactive data visualization
In [9]:
def interactive_plot(df, title):
    
  fig = px.line(df, title=title) # create a graph object
  
  for i in df.columns[1:]: # Add scattertrace - prices for every stock
    fig.add_scatter(x = df['Date'], y = df[i], name = i) 

  fig.show() # Show me the plot
In [10]:
interactive_plot(stocks_df, 'Raw Stock Prices (not normalized)')
In [11]:
interactive_plot(stocks_df_normalized, 'Prices (Normalized)')
Calculate individual Daily Stock Returns
In [13]:
# Let's only calculate the returns of sp500
df = stocks_df['VOO']
daily_returns = df.copy() # Do the calculations using df values, overwrite daily_returns values

for i in range(1, len(daily_returns)):
    daily_returns[i] = ((df[i] - df[i-1]) / df[i-1]) * 100

daily_returns[0] = 0
display(daily_returns)
0       0.000000
1       0.171536
2       0.325369
3      -0.204835
4       0.153935
          ...   
2222   -1.124560
2223    1.164029
2224    1.763874
2225    2.251662
2226    1.958121
Name: VOO, Length: 2227, dtype: float64
In [14]:
# We can do the same thing for Amazon Stock
df=stocks_df['AMZN']
daily_returns = df.copy() # pull the values from df, calculate the return, overwrite df_returns

for i in range(1, len(daily_returns)):
    daily_returns[i] = ((df[i] - df[i-1]) / df[i-1]) * 100

daily_returns[0] = 0
print(daily_returns)
0       0.000000
1      -0.849022
2       0.056338
3       2.815157
4      -2.217843
          ...   
2222   -5.445642
2223   -1.043095
2224    1.462148
2225    6.322969
2226    2.494172
Name: AMZN, Length: 2227, dtype: float64
Calculate multiple Stock Daily Returns
In [15]:
# Basically the same idea must be implemented to every column

def daily_returns(data):
    daily_returns = data.copy()
    
    for i in data.columns[1:]: # Loop over the column names
        for j in range(1, len(daily_returns)):
            daily_returns[i][j] = ((data[i][j] - data[i][j-1]) / data[i][j-1]) * 100 # Calculate the return using the previous day's price
        daily_returns[i][0] = 0 # Change the first value of ith column to 0
    
    return daily_returns
In [16]:
df_daily_return = daily_returns(stocks_df)
df_daily_return
Out[16]:
Date AAPL AMZN JPM MGM TSLA VOO VZ
0 2012-01-03 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1 2012-01-04 0.537453 -0.849022 0.633465 -0.630057 -1.317664 0.171536 -1.308834
2 2012-01-05 1.110220 0.056338 2.088702 0.815231 -2.129195 0.325369 -0.688596
3 2012-01-06 1.045370 2.815157 -0.896883 0.000000 -0.774336 -0.204835 -0.286135
4 2012-01-09 -0.158610 -2.217843 -0.169636 1.707078 1.263471 0.153935 0.104334
... ... ... ... ... ... ... ... ...
2222 2020-10-30 -5.601799 -5.445642 0.895341 -3.381874 -5.547301 -1.124560 0.405222
2223 2020-11-02 -0.082681 -1.043095 2.254181 1.944575 3.213586 1.164029 1.245831
2224 2020-11-03 1.535356 1.462148 3.152124 3.719604 5.840050 1.763874 0.086653
2225 2020-11-04 4.083661 6.322969 -3.055801 -1.425283 -0.688838 2.251662 -0.917747
2226 2020-11-05 3.549371 2.494172 4.089774 1.865667 4.064322 1.958121 1.625308

2227 rows × 8 columns

In [17]:
# Make both static and interactive plots: write the functions for both types of plots

# A function for static plot built using matplotlib
def show_plot(df, title):
    df.plot(x='Date', title=title, linewidth=3, figsize=(15,6))
    plt.grid()
    plt.show()

# Make the static plot
show_plot(df_daily_return, 'Stock Daily Returns')
In [18]:
# Interative plot of daily return: A function
def interactive_plot(df, title):
    # Create a graph object
    fig = px.line(df, title=title)
    
    # Loop over the column names
    for i in df.columns[1:]:
        fig.add_scatter(x=df['Date'], y=df[i], name=i) # Add a line for every stock's returns
    fig.show()
In [19]:
interactive_plot(df_daily_return, 'Daily Returns')